 /*
PROGRAM BY: JEFRREY WANG
Last edited 11/25/2019

This program merges CMF-LBD data with 2007 CMF materials trailers

Output is a CMF-MAT match by firm id (cfn, survu_id) and material code (mnaics) in 2007, at cmf_mat_match2007

*/


%include 'yyyy/pdata.sas';

options obs=max;
    

libname ecroot 'xxxx/data';


%macro link_mats(y);
* LOAD MATERIALS DATASET;
%if &y.=1992 %then %do;
data mat&y.(keep = cfn m mc);
    set pdata.cmf&y.mat;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if m~= " ";
    if mc>0; 
run; 

* make sure to collapse to unique cfn and mnaics;
proc means data=mat&y. NWAY NOPRINT;
    class cfn m;
    var mc;
    output out=mat&y. sum=;
run;

data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 sic_cmf naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(sic=sic_cmf));
    if year=&y.;
    if matcfn=1;
run;
%end;



*The values below are from public information in the CMF long forms;
%if &y.=1997 %then %do;
data mat&y.(keep = cfn mnaics mc);
    set pdata.cmf&y.mat;
    if mnaics~="00971000" & mnaics~="00972000" & mnaics~= "00970099" & mnaics~=" ";
    if mc>0; 
    if mnaics~="00999999";
run; 
    
* make sure to collapse to unique cfn and mnaics;
    
proc means data=mat&y. NWAY NOPRINT;
    class cfn mnaics;
    var mc;
    output out=mat&y. sum=;
run;
    
data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics97_census=naics_cmf));
    if year=&y.;
    if matcfn=1;
run;
%end;
    
*The values below are from public information in the CMF long forms;
%if &y. = 2002 %then %do;    
data mat&y.(keep = survu_id mnaics mc rename=(survu_id=cfn));
    set pdata.cmf&y.mat;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if mnaics~="00971000" & mnaics~="00972000" & mnaics~= "00970099" & mnaics~=" ";
    if mc>0; 
    if mnaics~="00999999";
run; 

* make sure to collapse to unique cfn and mnaics;
proc means data=mat&y. NWAY NOPRINT;
    class cfn mnaics;
    var mc;
    output out=mat&y. sum=;
run;

* Pull good plants from estabs2007_clean;
* Only plants with matcfn=1 are included;
* rules out more than half of the firms;
* total now bigger;

data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics02_census=naics_cmf));
    if year=&y.;
    if matcfn=1;
run;
%end;



*The number values below are from public information in the CMF long forms;  

%if &y. = 2007 %then %do;    
data mat&y.(keep = survu_id mnaics mc rename=(survu_id=cfn));
    set pdata.cmf&y.mat;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if usable_mat="U" | usable_mat="C";
    if mnaics~="00971000" & mnaics~="00972000" & mnaics~= "00970099" & mnaics~=" ";
    if mc>0; 
    if mnaics~="00999999";
run; 

* make sure to collapse to unique cfn and mnaics;
proc means data=mat&y. NWAY NOPRINT;
    class cfn mnaics;
    var mc;
    output out=mat&y. sum=;
run;

* Pull good plants from estabs2007_clean;
* Only plants with matcfn=1 are included;


data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics07_census=naics_cmf));
    if year=&y.;
    if matcfn=1;
run;
%end;
    
%if &y. = 2012 %then %do;    
data mat&y.(keep = survu_id mnaics mc rename=(survu_id=cfn));
    set pdata.cmf&y.mat;
    * only keep Usable and Corrected, clean out Not Usable, Deleted, and Illegal;
    if usable_mat="U" | usable_mat="C";
    if mnaics~="00971000" & mnaics~="00972000" & mnaics~= "00970099" & mnaics~=" ";
    if mc>0; 
    if mnaics~="00999999";
run; 

* make sure to collapse to unique cfn and mnaics;
proc means data=mat&y. NWAY NOPRINT;
    class cfn mnaics;
    var mc;
    output out=mat&y. sum=;
run;

* Pull good plants from estabs2007_clean;

data goodplants&y. (keep= year cfn sales firmid cf cp cm cw cr ee emp fk_naics12 naics_cmf bestnaics);
    set ecroot.estabs_all_clean (rename=(naics12_census=naics_cmf));
    if year=&y.;
    if matcfn=1;
run;
%end;
    
* merge mat data set with info from cmf;
data ecroot.cmf_mat_match&y.;
    merge mat&y.(in=a) goodplants&y.(in=b);
    by cfn;
    if a=1 & b=1;
run;
%mend;
******************************;
* merge mat2007 with goodplants;
******************************;

%macro loop1();
  %do y=1992 %to 2012 %by 5;
     %link_mats(&y.);
  %end;
%mend;
%loop1();
    
